clear all
set more off
capture log close

*to run these files, you will need to replace the following path with the corresponding path from your own computer

global mainpath "~/Dropbox/Maintenance/cleaning_experiment_full_paper/Replication_Folder"
cd $mainpath

* paths
global path_graphs "graphs"
global path_stata_graphs "stata_graphs"
global path_supplementary_graphs "supplementary_graphs"
global path_tables "tables"
global path_data "data"

set scheme s1mono

***********************
***** analysis *****
***********************

use $path_data/data_tw_anonymized_may_2023, clear

* water available
bys project: tab water_available
gen water_available_d = (water_available == "yes") if water_available != ""

*gen pooled training treatment dummy

gen training_treatment_pooled = training_treatment_1 + training_treatment_2
label var  training_treatment_pooled "Training, any"

*check for effects on water availability

reg water_available_d training_treatment_1 training_treatment_2, vce(hc3)

test training_treatment_1 = training_treatment_2
estadd scalar p_1vs2 = r(p)
estadd scalar control_mean = _b[_cons]

estimates store e_function_1

reg water_available_d training_treatment_pooled, vce(hc3)
estadd scalar control_mean = _b[_cons]

estimates store e_function_2

*Make Supplementary Table 2

esttab e_function_* using "$path_tables/function_r2.tex", replace booktabs keep(training_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) stats(p_1vs2 control_mean, layout(@) fmt(%05.3f %04.2f) labels("T vs T \$+\$ S: \$ p \$ value" "Control mean"))  mgroups("Functioning", pattern(1 0) span prefix(\multicolumn{@span}{c}{) suffix(}))

*estimate treatment effects on water quality

gen ecoli_present = (ecoli_mpn > 0) if ecoli_mpn != .
gen tot_coli_present = (tot_coli_mpn > 0) if tot_coli_mpn != .

gen asinh_ecoli = asinh(ecoli_mpn)
gen asinh_tot_coli = asinh(tot_coli_mpn)

*run regressions

local i = 1

foreach v in ecoli_present tot_coli_present asinh_ecoli asinh_tot_coli {
	
	reg `v' training_treatment_1 training_treatment_2, vce(hc3) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]


	estimates store e_main_`i'
	local i = `i' + 1

	reg `v' training_treatment_pooled, vce(hc3) level(90)
	
	estadd scalar control_mean = _b[_cons]


	estimates store e_main_`i'
	local i = `i' + 1

}

*Make Supplementary Table 9

esttab e_main_* using "$path_tables/main_results_2.tex", replace booktabs keep(training_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) stats(p_1vs2 control_mean, layout(@) fmt(%05.3f %04.2f) labels("T vs T \$+\$ S: \$ p \$ value" "Control mean")) mgroups("\emph{E. coli} present" "TC present""\emph{E. coli} (MPN)" "TC (MPN)", pattern(1 0 1 0 1 0 1 0) span prefix(\multicolumn{@span}{c}{) suffix(}))

*users 

gen asinh_users_N_reported = asinh(users_num_reported)
gen asinh_users_N_observed = asinh(users_num_observed)
	
*run regressions

local i = 1

foreach v in asinh_users_N_reported asinh_users_N_observed {
	
	reg `v' training_treatment_1 training_treatment_2, vce(hc3) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]


	estimates store e_users_`i'
	local i = `i' + 1

	reg `v' training_treatment_pooled, vce(hc3) level(90)
	
	estadd scalar control_mean = _b[_cons]

	estimates store e_users_`i'
	local i = `i' + 1

}
	
*Make Supplementary Table 5
	
esttab e_users_* using "$path_tables/users_r2.tex", replace booktabs keep(training_treatment*) cell(b(star fmt(%04.2f)) se(par fmt(%04.2f))) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 	nomtitle collabels(none) stats(p_1vs2 control_mean, layout(@) fmt(%05.3f %04.2f) labels("T vs T \$+\$ S: \$ p \$ value" "Control mean")) mgroups("Reported" "Observed" , pattern(1 0 1 0) span prefix(\multicolumn{@span}{c}{) suffix(}))

*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

local i = 1

foreach v in ecoli_present tot_coli_present asinh_ecoli asinh_tot_coli {
	
	mat `v'_g = J(4,4,.)
	
	estimates restore e_main_`i'
	
	mat `v'_g[1,1] = 1
	mat `v'_g[1,2] = _b[_cons]
	mat `v'_g[1,3] = _b[_cons] - 1.645 * _se[_cons]
	mat `v'_g[1,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
		
		local k = `j' + 1
	
		mat `v'_g[`k',1] = `k' + 1
		mat `v'_g[`k',2] = r(estimate)
		mat `v'_g[`k',3] = r(lb)
		mat `v'_g[`k',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
	
	local i = `i' + 1
	estimates restore e_main_`i'
	
	lincom _cons + training_treatment_pooled, level(90)
		
	local k = 4
	
	mat `v'_g[`k',1] = 6
	mat `v'_g[`k',2] = r(estimate)
	mat `v'_g[`k',3] = r(lb)
	mat `v'_g[`k',4] = r(ub)
	
	test training_treatment_pooled
	local `v'_p_t = r(p)
		
	local i = `i' + 1

}

*plot subgraphs 

foreach v in ecoli_present tot_coli_present asinh_ecoli asinh_tot_coli {
	
	if "`v'" == "ecoli_present" {
		
		local h1 0.35
		local h1_l 0.34
		local h1_u 0.37
		
		local h2 0.4
		local h2_l 0.39
		local h2_u 0.42
		
		local h3 0.45
		local h3_l 0.44
		local h3_u 0.47

		local ys "-0.03 0.49"
		local yl "0(0.1)0.3"
		
		local yvar "Share of wells"
		local subtitle "b) Presence of {it:E. coli}, 17 months"
	
	}
	
	if "`v'" == "tot_coli_present" {
		
		local h1 1.0
		local h1_l 0.98
		local h1_u 1.03
		
		local h2 1.1
		local h2_l 1.08
		local h2_u 1.13

		local h3 1.2
		local h3_l 1.18
		local h3_u 1.23
		
		local ys "0 1.25"
		local yl "0(0.2)0.8"
		
		local yvar "Share of wells"
		local subtitle "d) Presence of coliform bacteria, 17 months"
		
	}
	
	if "`v'" == "asinh_ecoli" {
		
		local h1 1.65
		local h1_l 1.62
		local h1_u 1.69
		
		local h2 1.75
		local h2_l 1.72
		local h2_u 1.79

		local h3 1.85
		local h3_l 1.82
		local h3_u 1.89
		
		local ys "-0.15 0.85"
		local yl `" 0 "0" 0.881 "1" 1.444 "2" 1.818 "3" "'
		
		
		
		local yvar "E. coli (MPN)"
		local subtitle "b) Concentration of {it:E. coli}, 17 months"

	
	}
	
	if "`v'" == "asinh_tot_coli" {
		
		local h1 4.6
		local h1_l 4.52
		local h1_u 4.75
		
		local h2 4.9
		local h2_l 4.82
		local h2_u 5.05

		local h3 5.2
		local h3_l 5.12
		local h3_u 5.35
		
		local ys "0 5.4"
		local yl `" 0 "0"  0.881 "1" 1.444 "2" 2.095 "4" 2.776 "8" 3.467 "16" 4.159 "32" 4.852 "64" "'
		
		local yvar "Total coliforms (MPN)"
		local subtitle "d) Concentration of total coliform bacteria, 17 months"

	
	}
	
	local b1 1 
	local b2_l 2.9
	local b2_r 3.1
	local b3 4
	local b4 6
	
	local b_p1 = 0.5 * (`b1' + `b2_l')
	local b_p2 = 0.5 * (`b2_r' + `b3')
	local b_p3 = 0.5 * (`b1' + `b3')
	local b_p4 = 0.5 * (`b1' + `b4')
	
	local brackets "(scatteri `h1' `b1' `h1' `b2_l',  recast(line) lw(medthin)  mc(none) lc(black) lp(solid)) (scatteri `h1' `b1' `h1' `b2_l',  recast(dropline) base(`h1_l') lw(medthin) mc(none) lc(black) lp(solid)) (scatteri `h1' `b2_r' `h1' `b3',  recast(line) lw(medthin)  mc(none) lc(black) lp(solid)) (scatteri `h1' `b2_r' `h1' `b3',  recast(dropline) base(`h1_l') lw(medthin) mc(none) lc(black) lp(solid)) (scatteri `h2' `b1' `h2' `b3',  recast(line) lw(medthin)  mc(none) lc(black) lp(solid))   (scatteri `h2' `b1' `h2' `b3',  recast(dropline) base(`h2_l') lw(medthin) mc(none) lc(black) lp(solid)) (scatteri `h3' `b1' `h3' `b4', recast(line) lw(medthin)  mc(none) lc(black) lp(solid))   (scatteri `h3' `b1' `h3' `b4', recast(dropline) base(`h3_l') lw(medthin) mc(none) lc(black) lp(solid)) "
	
	foreach p in `v'_p_t1 `v'_p_t2 `v'_p_t1vst2 `v'_p_t {
	
		if ``p'' < 0.001 {
			local `p'_f = "p < 0.001***"
		} 
		
		else if ``p'' < 0.01 {
			local `p'_f = "p = " + strofreal(``p'',"%05.3f") + "***"
		} 

		else if ``p'' < 0.05 {
			local `p'_f = "p = " + strofreal(``p'',"%05.3f") + "**"
		} 

		else if ``p'' < 0.1 {
			local `p'_f = "p = " + strofreal(``p'',"%05.3f") + "*"
		} 

		else local `p'_f = "p = " + strofreal(``p'',"%05.3f")	
	
	}
	
	local pvals "text(`h1_u' `b_p1' `"``v'_p_t1_f'"', placement(c) justification(center) size(small)) text(`h1_u' `b_p2' `"``v'_p_t1vst2_f'"', placement(c) justification(center) size(small)) text(`h2_u' `b_p3' `"``v'_p_t2_f'"', placement(c) justification(center) size(small)) text(`h3_u' `b_p4' `"``v'_p_t_f'"', placement(c) justification(center) size(small))  "
	
	
	capture drop `v'_g*
	svmat `v'_g
	
	twoway (bar `v'_g2 `v'_g1 if `v'_g1 == 1, bcolor(ebblue*0.75) barwidth(0.5)) (bar `v'_g2 `v'_g1 if `v'_g1 == 3, bcolor(orange*0.5) barwidth(0.5)) (bar `v'_g2 `v'_g1 if `v'_g1 == 4, bcolor(orange*0.75) barwidth(0.5)) (bar `v'_g2 `v'_g1 if `v'_g1 == 6, bcolor(orange*1.25) barwidth(0.5)) (rcap `v'_g3 `v'_g4 `v'_g1) `brackets', `pvals' legend(off) xlabel(1 "Control" 3 "Training" 4 `" "Training" "+ supplies" "' 6 `" "Training," "pooled" "', labsize(small) tlength(0)) subtitle("`subtitle'", size(small) position(11) span) xtitle("") ytitle("`yvar'", size(small)) xsize(6) ysize(8) yscale(range(`ys') lstyle(none)) ylabel(`yl', labsize(small) ) yline(0) xscale(range(0.5 6.5) lstyle(none) ) plotregion(style(none)) name(`v'_results_round2, replace)

	graph save $path_stata_graphs/`v'_results_round2, replace

}

*Bleach 
 
*caretakers for only 7 wells have bleach powder, 2 in training only, 5 in training and supplies

tab training_treatment bleach

*of which almost all, including one of the non-supply group, are using project supplies

tab training_treatment bleach_project_yn 	

*only two report having bought the bleach

tab bleach_bought_yn

tab bleach_bought_q

tab bleach_remaining_q

tab bleach_weight

*analyze caretaker data

use $path_data/data_caretakers_anonymized_may_2023, clear

*keep only functioning wells 

keep if water_available == "yes"

*count obs

unique tw if training_treatment == "control"
*51 caretaker surveys from 26 wells; one well with only one caretaker survey

unique tw if training_treatment == "training_and_supplies"
*52 caretaker surveys from 27 wells; two wells with only one caretaker survey

unique tw if training_treatment == "training_only"
*57 caretaker surveys from 30 wells; three wells with only one caretaker survey

*weight by no. of caretaker surveys/well

bys tw: egen n_caretaker_surveys = count(respondent)	
gen weight = 1/n_caretaker_surveys

*regressions

local i = 1

foreach v in  knowledge_brush knowledge_scrub knowledge_wash {
	
	reg `v' training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]

	estimates store e_knowledge_`i'
	local i = `i' + 1

}

*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

mat knowledge_g = J(9,4,.)

local i = 1

foreach v in knowledge_wash knowledge_scrub knowledge_brush {
	
	estimates restore e_knowledge_`i'
	
	mat knowledge_g[3*`i'-2,1] = 4*`i'-3
	
	mat knowledge_g[3*`i'-2,2] = _b[_cons]
	mat knowledge_g[3*`i'-2,3] = _b[_cons] - 1.645 * _se[_cons]
	mat knowledge_g[3*`i'-2,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
			
		mat knowledge_g[3*`i'-2+`j',1] = 4*`i' - 3 + `j'
		mat knowledge_g[3*`i'-2+`j',2] = r(estimate)
		mat knowledge_g[3*`i'-2+`j',3] = r(lb)
		mat knowledge_g[3*`i'-2+`j',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
		
	local i = `i' + 1

}	
	
capture drop knowledge_g*
svmat knowledge_g

*Plot subgraph

twoway (bar knowledge_g2 knowledge_g1 if knowledge_g1 == 1 | knowledge_g1 == 5 | knowledge_g1 == 9, bcolor(ebblue*0.75) barwidth(0.5)) (bar knowledge_g2 knowledge_g1 if knowledge_g1 == 2 | knowledge_g1 == 6 | knowledge_g1 == 10, bcolor(orange*0.5) barwidth(0.5))  (bar knowledge_g2 knowledge_g1 if knowledge_g1 == 3 | knowledge_g1 == 7 | knowledge_g1 == 11, bcolor(orange*0.75) barwidth(0.5)) (rcap knowledge_g3 knowledge_g4 knowledge_g1), legend(off) xmlabel(1 "C" 2 "T"  3 "T + S" 5 "C" 6 "T"  7 "T + S" 9 "C" 10 "T"  11 "T + S", labsize(small) tlength(0))  xlabel(2 `" "Clean accessible parts" "with a stiff brush" "' 6 `""Scrub accessible parts" "with chlorine solution" "'  10 `""Wash interior with" "chlorine solution" "', labsize(vsmall) tlength(0)  labgap(40pt)) subtitle("b) Caretaker knowledge, 17 months", size(small) position(11) span) xtitle("")  ytitle("Share of caretakers listing as best practice", size(small))  xsize(8) ysize(12) yscale(range(0 1) lstyle(none)) ylabel(0(0.2)1, labsize(small) ) yline(0) xscale(range(0.5 11.5) lstyle(none)) plotregion(style(none)) name(knowledge_results, replace)

graph save $path_stata_graphs/knowledge_comparisons_round2, replace

*practice: share regularly practicing each of the three steps

*binarize variables (all who knew of practice reported practicing at least rarely, so focus on whether they reported doing it regularly)

foreach v in freq_brush freq_scrub freq_wash {	
	gen `v'_reg = (`v' == "regularly") 
}

gen n_reg = freq_brush_reg + freq_scrub_reg + freq_wash_reg 


*regressions 
 
local i = 1

foreach v in  freq_brush_reg freq_scrub_reg freq_wash_reg {
	
	reg `v' training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw) level(90)

	test training_treatment_1 = training_treatment_2
	estadd scalar p_1vs2 = r(p)
	estadd scalar control_mean = _b[_cons]

	estimates store e_practice_`i'
	local i = `i' + 1

}

*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

mat practice_g = J(9,4,.)

local i = 1

foreach v in freq_brush_reg freq_scrub_reg freq_wash_reg {
	
	estimates restore e_practice_`i'
	
	mat practice_g[3*`i'-2,1] = 4*`i'-3
	
	mat practice_g[3*`i'-2,2] = _b[_cons]
	mat practice_g[3*`i'-2,3] = _b[_cons] - 1.645 * _se[_cons]
	mat practice_g[3*`i'-2,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
			
		mat practice_g[3*`i'-2+`j',1] = 4*`i' - 3 + `j'
		mat practice_g[3*`i'-2+`j',2] = r(estimate)
		mat practice_g[3*`i'-2+`j',3] = r(lb)
		mat practice_g[3*`i'-2+`j',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
		
	local i = `i' + 1

}	
 
*save results in macros for graphs: means and confidence intervals in matrices, p values in locals

mat practice_g = J(9,4,.)

local i = 1

foreach v in freq_brush_reg freq_scrub_reg freq_wash_reg {
	
	estimates restore e_practice_`i'
	
	mat practice_g[3*`i'-2,1] = 4*`i'-3
	
	mat practice_g[3*`i'-2,2] = _b[_cons]
	mat practice_g[3*`i'-2,3] = _b[_cons] - 1.645 * _se[_cons]
	mat practice_g[3*`i'-2,4] = _b[_cons] + 1.645 * _se[_cons]
	
	forvalues j = 1(1)2 {
 	
		lincom _cons + training_treatment_`j', level(90)
			
		mat practice_g[3*`i'-2+`j',1] = 4*`i' - 3 + `j'
		mat practice_g[3*`i'-2+`j',2] = r(estimate)
		mat practice_g[3*`i'-2+`j',3] = r(lb)
		mat practice_g[3*`i'-2+`j',4] = r(ub)
		
		test training_treatment_`j' 
		local `v'_p_t`j' = r(p)

	}
	
	test training_treatment_1 = training_treatment_2 
	local `v'_p_t1vst2 = r(p)
		
	local i = `i' + 1

}	
	
capture drop practice_g*
svmat practice_g

*Plot subgraph

twoway (bar practice_g2 practice_g1 if practice_g1 == 1 | practice_g1 == 5 | practice_g1 == 9, bcolor(ebblue*0.75) barwidth(0.5)) (bar practice_g2 practice_g1 if practice_g1 == 2 | practice_g1 == 6 | practice_g1 == 10, bcolor(orange*0.5) barwidth(0.5))  (bar practice_g2 practice_g1 if practice_g1 == 3 | practice_g1 == 7 | practice_g1 == 11, bcolor(orange*0.75) barwidth(0.5)) (rcap practice_g3 practice_g4 practice_g1), legend(off) xmlabel(1 "C" 2 "T"  3 "T + S" 5 "C" 6 "T"  7 "T + S" 9 "C" 10 "T"  11 "T + S", labsize(small) tlength(0))  xlabel(2 `" "Clean accessible parts" "with a stiff brush" "' 6 `""Scrub accessible parts" "with chlorine solution" "'  10 `""Wash interior with" "chlorine solution" "', labsize(vsmall) tlength(0)  labgap(40pt)) subtitle("d) Caretaker practice, 17 months", size(small) position(11) span) xtitle("")  ytitle("Share of caretakers regularly practicing", size(small))  xsize(8) ysize(12) yscale(range(0 1) lstyle(none)) ylabel(0(0.2)1, labsize(small) ) yline(0) xscale(range(0.5 11.5) lstyle(none)) plotregion(style(none)) name(practice_results, replace)

graph save $path_stata_graphs/practice_comparisons_round2, replace


*detaching wells

tab training_treatment bleaching_detached, row 

*those who report knowing something is best practice but not doing it: cite time as a reason for challenge_brush, and now cite lack of resources for chlorine based treatments

foreach v in challenge_brush challenge_scrub challenge_wash {
	tab  `v' training_treatment_num, nolab
}

*most people report having the tools necessary to disassemble the tubewell

tab training_treatment tools_avail, row
gen tools_avail_d = (tools_avail == "yes")

reg tools_avail_d training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw)
 
*now only a minority say it is easy to obtain bleaching powder in the local market

tab training_treatment bleaching_available, row

*No control group caretakers have purchased bleach.  17% of wells have a caretaker who has purchased bleach powder under training only, 7% under training and supplies

gen bleaching_purchased_d = (bleaching_purchased == "yes")

tab training_treatment bleaching_purchased, row
reg bleaching_purchased_d training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw)

* time taken 

*overall 97% report takes one hour

tab training_treatment cleaning_hours, row		
		
*support from community 

*the majority (69%) report receiving support either in labour (most commonly, 57%) or labour and cash (12%).  

tab cleaning_contrib, sort

gen cleaning_contrib_d = (regexm(cleaning_contrib,"yes") == 1)
tab cleaning_contrib_d

*rates are higher in treatment arms, significantly (p < 0.1) for training and supplies

tab training_treatment cleaning_contrib_d, row

reg cleaning_contrib_d training_treatment_1 training_treatment_2 [pweight = weight], cluster(tw)


*the end


